import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
import sys
bom_blr_data = pd.read_csv(r"C:\Users\jki\Downloads\flight_data_BOM_BLR.csv")
bom_blr_data
FlightName | FlightCode | DepartingCity | DepartingTime | ArrivingCity | ArrivingTime | Duration | Price | |
---|---|---|---|---|---|---|---|---|
0 | Air India | AI 621 | Mumbai | 03:55 | Bengaluru | 05:50 | 01 h 55 m | 2,307 |
1 | AirAsia | I5 670 | Mumbai | 19:55 | Bengaluru | 21:45 | 01 h 50 m | 2,773 |
2 | AirAsia | I5 2992 | Mumbai | 23:55 | Bengaluru | 01:45\r\n+ 1 DAY | 01 h 50 m | 2,773 |
3 | IndiGo | 6E 5388 | Mumbai | 21:30 | Bengaluru | 23:15 | 01 h 45 m | 2,839 |
4 | Akasa Air | QP 1103 | Mumbai | 00:45 | Bengaluru | 02:20 | 01 h 35 m | 3,005 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
85 | Vistara | UK 840 | Bengaluru | 21:35 | Mumbai | 23:20 | 01 h 45 m | 11,038 |
86 | Vistara | UK 850 | Bengaluru | 19:55 | Mumbai | 21:35 | 01 h 40 m | 11,934 |
87 | Vistara | UK 864 | Bengaluru | 19:00 | Mumbai | 20:35 | 01 h 35 m | 14,596 |
88 | Vistara | UK 864 | Bengaluru | 19:00 | Mumbai | 20:35 | 01 h 35 m | 14,596 |
89 | Air India | AI 642 | Bengaluru | 21:25 | Mumbai | 23:20 | 01 h 55 m | 20,581 |
90 rows × 8 columns
bom_blr_data.shape
(90, 8)
bom_blr_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 90 entries, 0 to 89 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 FlightName 90 non-null object 1 FlightCode 90 non-null object 2 DepartingCity 90 non-null object 3 DepartingTime 90 non-null object 4 ArrivingCity 90 non-null object 5 ArrivingTime 90 non-null object 6 Duration 90 non-null object 7 Price 90 non-null object dtypes: object(8) memory usage: 5.8+ KB
bom_blr_data.describe()
FlightName | FlightCode | DepartingCity | DepartingTime | ArrivingCity | ArrivingTime | Duration | Price | |
---|---|---|---|---|---|---|---|---|
count | 90 | 90 | 90 | 90 | 90 | 90 | 90 | 90 |
unique | 5 | 70 | 2 | 58 | 2 | 61 | 9 | 53 |
top | IndiGo | 6E 5124 | Bengaluru | 09:20 | Mumbai | 11:35 | 01 h 50 m | 3,005 |
freq | 39 | 3 | 46 | 3 | 46 | 3 | 21 | 5 |
del_blr_data = pd.read_csv(r"C:\Users\jki\Downloads\flight_data_DEL_BLR.csv")
del_blr_data
FlightName | FlightCode | DepartingCity | DepartingTime | ArrivingCity | ArrivingTime | Duration | Price | |
---|---|---|---|---|---|---|---|---|
0 | Akasa Air | QP 1409 | New Delhi | 23:00 | Bengaluru | 01:45\n+ 1 DAY | 02 h 45 m | 5,230 |
1 | AirAsia | I5 740 | New Delhi | 07:50 | Bengaluru | 10:40 | 02 h 50 m | 5,235 |
2 | AirAsia | I5 1534 | New Delhi | 22:55 | Bengaluru | 01:35\n+ 1 DAY | 02 h 40 m | 5,235 |
3 | IndiGo | 6E 2067 | New Delhi | 03:55 | Bengaluru | 06:45 | 02 h 50 m | 5,236 |
4 | IndiGo | 6E 6612 | New Delhi | 05:50 | Bengaluru | 08:35 | 02 h 45 m | 5,236 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
90 | Air India | AI 808 | Bengaluru | 21:00 | New Delhi | 23:55 | 02 h 55 m | 10,711 |
91 | Vistara | UK 816 | Bengaluru | 11:30 | New Delhi | 14:10 | 02 h 40 m | 10,774 |
92 | Vistara | UK 816 | Bengaluru | 11:30 | New Delhi | 14:10 | 02 h 40 m | 10,774 |
93 | IndiGo | 6E 2339 | Bengaluru | 00:30 | New Delhi | 03:20 | 02 h 50 m | 12,445 |
94 | Vistara | UK 808 | Bengaluru | 08:00 | New Delhi | 10:35 | 02 h 35 m | 13,399 |
95 rows × 8 columns
del_blr_data.shape
(95, 8)
del_blr_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 95 entries, 0 to 94 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 FlightName 95 non-null object 1 FlightCode 95 non-null object 2 DepartingCity 95 non-null object 3 DepartingTime 95 non-null object 4 ArrivingCity 95 non-null object 5 ArrivingTime 95 non-null object 6 Duration 95 non-null object 7 Price 95 non-null object dtypes: object(8) memory usage: 6.1+ KB
del_blr_data.describe()
FlightName | FlightCode | DepartingCity | DepartingTime | ArrivingCity | ArrivingTime | Duration | Price | |
---|---|---|---|---|---|---|---|---|
count | 95 | 95 | 95 | 95 | 95 | 95 | 95 | 95 |
unique | 6 | 75 | 2 | 63 | 2 | 69 | 10 | 40 |
top | IndiGo | 6E 6612 | New Delhi | 08:00 | Bengaluru | 20:45 | 02 h 45 m | 5,354 |
freq | 36 | 3 | 48 | 6 | 48 | 3 | 26 | 11 |
del_bom_data = pd.read_csv(r"C:\Users\jki\Downloads\flight_data_DEL_BOM.csv")
del_bom_data
FlightName | FlightCode | DepartingCity | DepartingTime | ArrivingCity | ArrivingTime | Duration | Price | |
---|---|---|---|---|---|---|---|---|
0 | Akasa Air | QP 1719 | New Delhi | 09:00 | Mumbai | 10:55 | 01 h 55 m | 4,758 |
1 | Akasa Air | QP 1411 | New Delhi | 10:55 | Mumbai | 13:05 | 02 h 10 m | 4,758 |
2 | Akasa Air | QP 1128 | New Delhi | 16:00 | Mumbai | 18:10 | 02 h 10 m | 4,758 |
3 | IndiGo | 6E 2009 | New Delhi | 01:00 | Mumbai | 03:00 | 02 h | 4,780 |
4 | IndiGo | 6E 2112 | New Delhi | 05:30 | Mumbai | 07:35 | 02 h 05 m | 4,780 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
95 | Vistara | UK 902 | Mumbai | 15:45 | New Delhi | 18:10 | 02 h 25 m | 10,539 |
96 | Vistara | UK 910 | Mumbai | 17:25 | New Delhi | 19:35 | 02 h 10 m | 10,539 |
97 | Vistara | UK 996 | Mumbai | 18:30 | New Delhi | 20:40 | 02 h 10 m | 10,539 |
98 | Vistara | UK 950 | Mumbai | 21:55 | New Delhi | 00:10\n+ 1 DAY | 02 h 15 m | 10,539 |
99 | Vistara | UK 986 | Mumbai | 22:50 | New Delhi | 01:00\n+ 1 DAY | 02 h 10 m | 10,539 |
100 rows × 8 columns
del_bom_data.shape
(100, 8)
del_bom_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 100 entries, 0 to 99 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 FlightName 100 non-null object 1 FlightCode 100 non-null object 2 DepartingCity 100 non-null object 3 DepartingTime 100 non-null object 4 ArrivingCity 100 non-null object 5 ArrivingTime 100 non-null object 6 Duration 100 non-null object 7 Price 100 non-null object dtypes: object(8) memory usage: 6.4+ KB
del_bom_data.describe()
FlightName | FlightCode | DepartingCity | DepartingTime | ArrivingCity | ArrivingTime | Duration | Price | |
---|---|---|---|---|---|---|---|---|
count | 100 | 100 | 100 | 100 | 100 | 100 | 100 | 100 |
unique | 5 | 80 | 2 | 61 | 2 | 72 | 9 | 15 |
top | IndiGo | AI 864 | New Delhi | 09:00 | Mumbai | 01:00\n+ 1 DAY | 02 h 10 m | 4,780 |
freq | 52 | 3 | 50 | 4 | 50 | 3 | 30 | 37 |
del_ccu_data = pd.read_csv(r"C:\Users\jki\Downloads\flight_data_DEL_CCU.csv")
del_ccu_data
FlightName | FlightCode | DepartingCity | DepartingTime | ArrivingCity | ArrivingTime | Duration | Price | |
---|---|---|---|---|---|---|---|---|
0 | IndiGo | 6E 2284 | New Delhi | 05:25 | Kolkata | 07:55 | 02 h 30 m | 5,408 |
1 | Vistara | UK 747 | New Delhi | 06:15 | Kolkata | 08:25 | 02 h 10 m | 5,408 |
2 | IndiGo | 6E 5219 | New Delhi | 06:25 | Kolkata | 08:35 | 02 h 10 m | 5,408 |
3 | IndiGo | 6E 6182 | New Delhi | 15:55 | Kolkata | 18:10 | 02 h 15 m | 5,408 |
4 | IndiGo | 6E 2415 | New Delhi | 17:45 | Kolkata | 20:00 | 02 h 15 m | 5,408 |
5 | IndiGo | 6E 2415 | New Delhi | 17:45 | Kolkata | 20:00 | 02 h 15 m | 5,408 |
6 | IndiGo | 6E 2415 | New Delhi | 17:45 | Kolkata | 20:00 | 02 h 15 m | 5,408 |
7 | IndiGo | 6E 2057 | New Delhi | 19:00 | Kolkata | 21:05 | 02 h 05 m | 5,408 |
8 | IndiGo | 6E 2057 | New Delhi | 19:00 | Kolkata | 21:05 | 02 h 05 m | 5,408 |
9 | SpiceJet | SG 8251 | New Delhi | 19:30 | Kolkata | 21:40 | 02 h 10 m | 5,408 |
10 | IndiGo | 6E 2517 | New Delhi | 20:30 | Kolkata | 22:40 | 02 h 10 m | 5,408 |
11 | IndiGo | 6E 6005 | New Delhi | 22:50 | Kolkata | 01:00\n+ 1 DAY | 02 h 10 m | 5,408 |
12 | IndiGo | 6E 2603 | New Delhi | 23:50 | Kolkata | 02:05\n+ 1 DAY | 02 h 15 m | 5,408 |
13 | Vistara | UK 737 | New Delhi | 15:45 | Kolkata | 18:05 | 02 h 20 m | 5,440 |
14 | Vistara | UK 737 | New Delhi | 15:45 | Kolkata | 18:05 | 02 h 20 m | 5,440 |
15 | Air India | AI 767 | New Delhi | 12:20 | Kolkata | 14:30 | 02 h 10 m | 5,492 |
16 | Air India | AI 764 | New Delhi | 17:05 | Kolkata | 19:15 | 02 h 10 m | 5,492 |
17 | Air India | AI 764 | New Delhi | 17:05 | Kolkata | 19:15 | 02 h 10 m | 5,492 |
18 | Air India | AI 762 | New Delhi | 21:00 | Kolkata | 23:30 | 02 h 30 m | 5,492 |
19 | IndiGo | 6E 2788 | New Delhi | 02:55 | Kolkata | 05:10 | 02 h 15 m | 5,849 |
20 | IndiGo | 6E 2788 | New Delhi | 02:55 | Kolkata | 05:10 | 02 h 15 m | 5,849 |
21 | IndiGo | 6E 282 | New Delhi | 10:10 | Kolkata | 12:10 | 02 h | 5,849 |
22 | IndiGo | 6E 6557 | New Delhi | 13:00 | Kolkata | 15:00 | 02 h | 5,849 |
23 | Vistara | UK 707 | New Delhi | 17:35 | Kolkata | 19:35 | 02 h | 5,881 |
24 | SpiceJet | SG 8263 | New Delhi | 07:10 | Kolkata | 09:30 | 02 h 20 m | 5,986 |
25 | Air India | AI 401 | New Delhi | 06:55 | Kolkata | 09:00 | 02 h 05 m | 6,374 |
26 | Air India | AI 401 | New Delhi | 06:55 | Kolkata | 09:00 | 02 h 05 m | 6,374 |
27 | Vistara | UK 705 | New Delhi | 07:40 | Kolkata | 09:45 | 02 h 05 m | 8,474 |
28 | Vistara | UK 727 | New Delhi | 20:30 | Kolkata | 22:20 | 01 h 50 m | 10,469 |
29 | IndiGo | 6E 5213 | New Delhi | 09:00 | Kolkata | 11:05 | 02 h 05 m | 15,298 |
30 | IndiGo | 6E 5214 | Kolkata | 05:25 | New Delhi | 07:50 | 02 h 25 m | 6,020 |
31 | IndiGo | 6E 375 | Kolkata | 07:00 | New Delhi | 09:15 | 02 h 15 m | 6,020 |
32 | IndiGo | 6E 6183 | Kolkata | 18:55 | New Delhi | 21:10 | 02 h 15 m | 6,020 |
33 | IndiGo | 6E 898 | Kolkata | 19:35 | New Delhi | 21:55 | 02 h 20 m | 6,020 |
34 | IndiGo | 6E 2224 | Kolkata | 20:55 | New Delhi | 23:10 | 02 h 15 m | 6,020 |
35 | IndiGo | 6E 2224 | Kolkata | 20:55 | New Delhi | 23:10 | 02 h 15 m | 6,020 |
36 | IndiGo | 6E 2224 | Kolkata | 20:55 | New Delhi | 23:10 | 02 h 15 m | 6,020 |
37 | IndiGo | 6E 2716 | Kolkata | 21:45 | New Delhi | 00:05\n+ 1 DAY | 02 h 20 m | 6,020 |
38 | IndiGo | 6E 2716 | Kolkata | 21:45 | New Delhi | 00:05\n+ 1 DAY | 02 h 20 m | 6,020 |
39 | SpiceJet | SG 8265 | Kolkata | 22:20 | New Delhi | 00:30\n+ 1 DAY | 02 h 10 m | 6,020 |
40 | IndiGo | 6E 2746 | Kolkata | 23:45 | New Delhi | 02:00\n+ 1 DAY | 02 h 15 m | 6,020 |
41 | Air India | AI 763 | Kolkata | 06:55 | New Delhi | 09:15 | 02 h 20 m | 6,063 |
42 | Vistara | UK 778 | Kolkata | 15:15 | New Delhi | 17:50 | 02 h 35 m | 6,063 |
43 | Air India | AI 768 | Kolkata | 15:25 | New Delhi | 18:10 | 02 h 45 m | 6,063 |
44 | Air India | AI 768 | Kolkata | 15:25 | New Delhi | 18:10 | 02 h 45 m | 6,063 |
45 | Air India | AI 770 | Kolkata | 20:00 | New Delhi | 22:35 | 02 h 35 m | 6,063 |
46 | SpiceJet | SG 8373 | Kolkata | 10:50 | New Delhi | 13:10 | 02 h 20 m | 6,064 |
47 | SpiceJet | SG 8373 | Kolkata | 10:50 | New Delhi | 13:10 | 02 h 20 m | 6,064 |
48 | Vistara | UK 720 | Kolkata | 07:10 | New Delhi | 09:35 | 02 h 25 m | 6,095 |
49 | Vistara | UK 706 | Kolkata | 10:25 | New Delhi | 12:40 | 02 h 15 m | 6,095 |
50 | Vistara | UK 706 | Kolkata | 10:25 | New Delhi | 12:40 | 02 h 15 m | 6,095 |
51 | Vistara | UK 738 | Kolkata | 18:45 | New Delhi | 21:20 | 02 h 35 m | 6,095 |
52 | Vistara | UK 708 | Kolkata | 20:30 | New Delhi | 23:00 | 02 h 30 m | 6,095 |
53 | IndiGo | 6E 2513 | Kolkata | 08:30 | New Delhi | 10:55 | 02 h 25 m | 6,212 |
54 | IndiGo | 6E 2759 | Kolkata | 15:40 | New Delhi | 18:00 | 02 h 20 m | 6,212 |
55 | IndiGo | 6E 2345 | Kolkata | 10:10 | New Delhi | 12:30 | 02 h 20 m | 6,420 |
56 | IndiGo | 6E 2345 | Kolkata | 10:10 | New Delhi | 12:30 | 02 h 20 m | 6,420 |
57 | IndiGo | 6E 2358 | Kolkata | 11:40 | New Delhi | 14:00 | 02 h 20 m | 6,420 |
58 | Air India | AI 769 | Kolkata | 10:00 | New Delhi | 12:20 | 02 h 20 m | 8,100 |
59 | IndiGo | 6E 2516 | Kolkata | 16:45 | New Delhi | 19:05 | 02 h 20 m | 11,820 |
del_ccu_data.shape
(60, 8)
del_ccu_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 60 entries, 0 to 59 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 FlightName 60 non-null object 1 FlightCode 60 non-null object 2 DepartingCity 60 non-null object 3 DepartingTime 60 non-null object 4 ArrivingCity 60 non-null object 5 ArrivingTime 60 non-null object 6 Duration 60 non-null object 7 Price 60 non-null object dtypes: object(8) memory usage: 3.9+ KB
del_ccu_data.describe()
FlightName | FlightCode | DepartingCity | DepartingTime | ArrivingCity | ArrivingTime | Duration | Price | |
---|---|---|---|---|---|---|---|---|
count | 60 | 60 | 60 | 60 | 60 | 60 | 60 | 60 |
unique | 4 | 46 | 2 | 40 | 2 | 44 | 10 | 18 |
top | IndiGo | 6E 2224 | New Delhi | 06:55 | Kolkata | 23:10 | 02 h 15 m | 5,408 |
freq | 32 | 3 | 30 | 3 | 30 | 3 | 15 | 13 |
del_hyd_data = pd.read_csv(r"C:\Users\jki\Downloads\flight_data_DEL_HYD.csv")
del_hyd_data
FlightName | FlightCode | DepartingCity | DepartingTime | ArrivingCity | ArrivingTime | Duration | Price | |
---|---|---|---|---|---|---|---|---|
0 | Akasa Air | QP 1406 | New Delhi | 12:45 | Hyderabad | 15:05 | 02 h 20 m | 4,548 |
1 | IndiGo | 6E 2461 | New Delhi | 01:55 | Hyderabad | 04:10 | 02 h 15 m | 4,750 |
2 | IndiGo | 6E 2337 | New Delhi | 05:40 | Hyderabad | 07:55 | 02 h 15 m | 4,750 |
3 | IndiGo | 6E 6203 | New Delhi | 06:30 | Hyderabad | 08:40 | 02 h 10 m | 4,750 |
4 | Air India | AI 560 | New Delhi | 07:10 | Hyderabad | 09:20 | 02 h 10 m | 4,750 |
5 | Air India | AI 560 | New Delhi | 07:10 | Hyderabad | 09:20 | 02 h 10 m | 4,750 |
6 | Air India | AI 560 | New Delhi | 07:10 | Hyderabad | 09:20 | 02 h 10 m | 4,750 |
7 | IndiGo | 6E 774 | New Delhi | 08:10 | Hyderabad | 10:20 | 02 h 10 m | 4,750 |
8 | IndiGo | 6E 774 | New Delhi | 08:10 | Hyderabad | 10:20 | 02 h 10 m | 4,750 |
9 | Air India | AI 542 | New Delhi | 09:30 | Hyderabad | 12:00 | 02 h 30 m | 4,750 |
10 | IndiGo | 6E 2005 | New Delhi | 10:10 | Hyderabad | 12:15 | 02 h 05 m | 4,750 |
11 | Air India | AI 522 | New Delhi | 11:05 | Hyderabad | 13:25 | 02 h 20 m | 4,750 |
12 | IndiGo | 6E 837 | New Delhi | 13:30 | Hyderabad | 15:45 | 02 h 15 m | 4,750 |
13 | IndiGo | 6E 2187 | New Delhi | 15:05 | Hyderabad | 17:20 | 02 h 15 m | 4,750 |
14 | IndiGo | 6E 2187 | New Delhi | 15:05 | Hyderabad | 17:20 | 02 h 15 m | 4,750 |
15 | Air India | AI 544 | New Delhi | 17:00 | Hyderabad | 19:10 | 02 h 10 m | 4,750 |
16 | IndiGo | 6E 5312 | New Delhi | 17:15 | Hyderabad | 19:25 | 02 h 10 m | 4,750 |
17 | IndiGo | 6E 5312 | New Delhi | 17:15 | Hyderabad | 19:25 | 02 h 10 m | 4,750 |
18 | IndiGo | 6E 2341 | New Delhi | 18:45 | Hyderabad | 21:00 | 02 h 15 m | 4,750 |
19 | IndiGo | 6E 605 | New Delhi | 19:45 | Hyderabad | 22:00 | 02 h 15 m | 4,750 |
20 | IndiGo | 6E 605 | New Delhi | 19:45 | Hyderabad | 22:00 | 02 h 15 m | 4,750 |
21 | IndiGo | 6E 2371 | New Delhi | 21:25 | Hyderabad | 23:40 | 02 h 15 m | 4,750 |
22 | Air India | AI 839 | New Delhi | 21:30 | Hyderabad | 23:45 | 02 h 15 m | 4,750 |
23 | Vistara | UK 879 | New Delhi | 17:35 | Hyderabad | 19:50 | 02 h 15 m | 4,873 |
24 | Vistara | UK 859 | New Delhi | 10:25 | Hyderabad | 12:35 | 02 h 10 m | 4,904 |
25 | Vistara | UK 899 | New Delhi | 14:45 | Hyderabad | 17:00 | 02 h 15 m | 4,925 |
26 | Vistara | UK 899 | New Delhi | 14:45 | Hyderabad | 17:00 | 02 h 15 m | 4,925 |
27 | SpiceJet | SG 8164 | New Delhi | 23:00 | Hyderabad | 01:00\n+ 1 DAY | 02 h | 4,981 |
28 | Vistara | UK 829 | New Delhi | 07:20 | Hyderabad | 09:30 | 02 h 10 m | 5,692 |
29 | Vistara | UK 871 | New Delhi | 20:35 | Hyderabad | 22:55 | 02 h 20 m | 6,343 |
30 | Akasa Air | QP 1407 | Hyderabad | 19:25 | New Delhi | 21:45 | 02 h 20 m | 4,979 |
31 | IndiGo | 6E 379 | Hyderabad | 05:05 | New Delhi | 07:25 | 02 h 20 m | 4,986 |
32 | IndiGo | 6E 491 | Hyderabad | 09:30 | New Delhi | 11:50 | 02 h 20 m | 4,986 |
33 | IndiGo | 6E 2171 | Hyderabad | 11:55 | New Delhi | 14:05 | 02 h 10 m | 4,986 |
34 | IndiGo | 6E 2063 | Hyderabad | 14:25 | New Delhi | 16:45 | 02 h 20 m | 4,986 |
35 | IndiGo | 6E 2063 | Hyderabad | 14:25 | New Delhi | 16:45 | 02 h 20 m | 4,986 |
36 | IndiGo | 6E 2063 | Hyderabad | 14:25 | New Delhi | 16:45 | 02 h 20 m | 4,986 |
37 | IndiGo | 6E 2003 | Hyderabad | 18:15 | New Delhi | 20:30 | 02 h 15 m | 4,986 |
38 | IndiGo | 6E 2003 | Hyderabad | 18:15 | New Delhi | 20:30 | 02 h 15 m | 4,986 |
39 | IndiGo | 6E 5605 | Hyderabad | 21:55 | New Delhi | 00:05\n+ 1 DAY | 02 h 10 m | 4,986 |
40 | IndiGo | 6E 6146 | Hyderabad | 22:45 | New Delhi | 01:00\n+ 1 DAY | 02 h 15 m | 4,986 |
41 | Air India | AI 543 | Hyderabad | 10:05 | New Delhi | 12:30 | 02 h 25 m | 5,052 |
42 | IndiGo | 6E 2011 | Hyderabad | 06:45 | New Delhi | 09:00 | 02 h 15 m | 5,210 |
43 | IndiGo | 6E 2342 | Hyderabad | 08:40 | New Delhi | 10:45 | 02 h 05 m | 5,210 |
44 | IndiGo | 6E 2342 | Hyderabad | 08:40 | New Delhi | 10:45 | 02 h 05 m | 5,210 |
45 | IndiGo | 6E 6606 | Hyderabad | 16:30 | New Delhi | 18:50 | 02 h 20 m | 5,210 |
46 | Vistara | UK 880 | Hyderabad | 13:15 | New Delhi | 15:35 | 02 h 20 m | 5,401 |
47 | Vistara | UK 880 | Hyderabad | 13:15 | New Delhi | 15:35 | 02 h 20 m | 5,401 |
48 | Vistara | UK 890 | Hyderabad | 17:40 | New Delhi | 19:55 | 02 h 15 m | 5,453 |
49 | Air India | AI 559 | Hyderabad | 06:15 | New Delhi | 08:45 | 02 h 30 m | 5,472 |
50 | Air India | AI 559 | Hyderabad | 06:15 | New Delhi | 08:45 | 02 h 30 m | 5,472 |
51 | Air India | AI 523 | Hyderabad | 14:10 | New Delhi | 16:35 | 02 h 25 m | 5,472 |
52 | Air India | AI 541 | Hyderabad | 16:25 | New Delhi | 18:50 | 02 h 25 m | 5,472 |
53 | Air India | AI 840 | Hyderabad | 20:50 | New Delhi | 23:20 | 02 h 30 m | 5,472 |
54 | SpiceJet | SG 160 | Hyderabad | 06:10 | New Delhi | 08:25 | 02 h 15 m | 5,629 |
55 | Vistara | UK 830 | Hyderabad | 10:10 | New Delhi | 12:30 | 02 h 20 m | 5,905 |
56 | Vistara | UK 830 | Hyderabad | 10:10 | New Delhi | 12:30 | 02 h 20 m | 5,905 |
57 | Vistara | UK 860 | Hyderabad | 07:00 | New Delhi | 08:55 | 01 h 55 m | 7,585 |
58 | Vistara | UK 870 | Hyderabad | 20:35 | New Delhi | 22:45 | 02 h 10 m | 7,606 |
del_hyd_data.shape
(59, 8)
del_hyd_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 59 entries, 0 to 58 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 FlightName 59 non-null object 1 FlightCode 59 non-null object 2 DepartingCity 59 non-null object 3 DepartingTime 59 non-null object 4 ArrivingCity 59 non-null object 5 ArrivingTime 59 non-null object 6 Duration 59 non-null object 7 Price 59 non-null object dtypes: object(8) memory usage: 3.8+ KB
del_hyd_data.describe()
FlightName | FlightCode | DepartingCity | DepartingTime | ArrivingCity | ArrivingTime | Duration | Price | |
---|---|---|---|---|---|---|---|---|
count | 59 | 59 | 59 | 59 | 59 | 59 | 59 | 59 |
unique | 5 | 45 | 2 | 42 | 2 | 42 | 8 | 19 |
top | IndiGo | 6E 2063 | New Delhi | 07:10 | Hyderabad | 16:45 | 02 h 15 m | 4,750 |
freq | 29 | 3 | 30 | 3 | 30 | 3 | 19 | 22 |
bom_blr_data["Price"] = bom_blr_data["Price"].str.replace(",", "")
del_blr_data["Price"] = del_blr_data["Price"].str.replace(",", "")
del_bom_data["Price"] = del_bom_data["Price"].str.replace(",", "")
del_ccu_data["Price"] = del_ccu_data["Price"].str.replace(",", "")
del_hyd_data["Price"] = del_hyd_data["Price"].str.replace(",", "")
bom_blr_data = bom_blr_data.astype({"Price":int})
del_blr_data = del_blr_data.astype({"Price":int})
del_bom_data = del_bom_data.astype({"Price":int})
del_ccu_data = del_ccu_data.astype({"Price":int})
del_hyd_data = del_hyd_data.astype({"Price":int})
bom_blr_data["DepartingHour"] = bom_blr_data["DepartingTime"].str.split(":").str[0]
del_blr_data["DepartingHour"] = del_blr_data["DepartingTime"].str.split(":").str[0]
del_bom_data["DepartingHour"] = del_bom_data["DepartingTime"].str.split(":").str[0]
del_ccu_data["DepartingHour"] = del_ccu_data["DepartingTime"].str.split(":").str[0]
del_hyd_data["DepartingHour"] = del_hyd_data["DepartingTime"].str.split(":").str[0]
bom_blr_data = bom_blr_data.astype({"DepartingHour" : int})
del_blr_data = del_blr_data.astype({"DepartingHour" : int})
del_bom_data = del_bom_data.astype({"DepartingHour" : int})
del_ccu_data = del_ccu_data.astype({"DepartingHour" : int})
del_hyd_data = del_hyd_data.astype({"DepartingHour" : int})
sns.pairplot(bom_blr_data, kind='scatter', height=3.5)
<seaborn.axisgrid.PairGrid at 0x1c29bba1510>
sns.pairplot(del_blr_data, kind='scatter', height=3.5)
<seaborn.axisgrid.PairGrid at 0x1c29d070a10>
sns.pairplot(del_bom_data, kind='scatter', height=3.5)
<seaborn.axisgrid.PairGrid at 0x1c29bcb2790>
sns.pairplot(del_ccu_data, kind='scatter', height=3.5)
<seaborn.axisgrid.PairGrid at 0x1c29d117250>
sns.pairplot(del_hyd_data, kind='scatter', height=3.5)
<seaborn.axisgrid.PairGrid at 0x1c29d798dd0>
sns.lmplot(bom_blr_data, x='DepartingHour', y='Price', line_kws = {'color':'black'}, scatter_kws = {'color':'darkgrey'})
plt.title('DepartingHour vs Price')
Text(0.5, 1.0, 'DepartingHour vs Price')
sns.lmplot(del_blr_data, x='DepartingHour', y='Price', line_kws = {'color':'black'}, scatter_kws = {'color':'darkgrey'})
plt.title('DepartingHour vs Price')
Text(0.5, 1.0, 'DepartingHour vs Price')
sns.lmplot(del_bom_data, x='DepartingHour', y='Price', line_kws = {'color':'black'}, scatter_kws = {'color':'darkgrey'})
plt.title('DepartingHour vs Price')
Text(0.5, 1.0, 'DepartingHour vs Price')
sns.lmplot(del_ccu_data, x='DepartingHour', y='Price', line_kws = {'color':'black'}, scatter_kws = {'color':'darkgrey'})
plt.title('DepartingHour vs Price')
Text(0.5, 1.0, 'DepartingHour vs Price')
sns.lmplot(del_hyd_data, x='DepartingHour', y='Price', line_kws = {'color':'black'}, scatter_kws = {'color':'darkgrey'})
plt.title('DepartingHour vs Price')
Text(0.5, 1.0, 'DepartingHour vs Price')
bom_blr_data_avg_price = bom_blr_data.groupby('FlightName')['Price'].mean().reset_index().sort_values(by='Price', ascending=False)
sns.barplot(x='FlightName', y='Price', data=bom_blr_data_avg_price)
plt.title('Barplot FlightName vs Avg. Price')
Text(0.5, 1.0, 'Barplot FlightName vs Avg. Price')
del_blr_data_avg_price = del_blr_data.groupby('FlightName')['Price'].mean().reset_index().sort_values(by='Price', ascending=False)
sns.barplot(x='FlightName', y='Price', data=del_blr_data_avg_price)
plt.title('Barplot FlightName vs Avg. Price')
Text(0.5, 1.0, 'Barplot FlightName vs Avg. Price')
del_bom_data_avg_price = del_bom_data.groupby('FlightName')['Price'].mean().reset_index().sort_values(by='Price', ascending=False)
sns.barplot(x='FlightName', y='Price', data=del_bom_data_avg_price)
plt.title('Barplot FlightName vs Avg. Price')
Text(0.5, 1.0, 'Barplot FlightName vs Avg. Price')
del_ccu_data_avg_price = del_ccu_data.groupby('FlightName')['Price'].mean().reset_index().sort_values(by='Price', ascending=False)
sns.barplot(x='FlightName', y='Price', data=del_ccu_data_avg_price)
plt.title('Barplot FlightName vs Avg. Price')
Text(0.5, 1.0, 'Barplot FlightName vs Avg. Price')
del_hyd_data_avg_price = del_hyd_data.groupby('FlightName')['Price'].mean().reset_index().sort_values(by='Price', ascending=False)
sns.barplot(x='FlightName', y='Price', data=del_hyd_data_avg_price)
plt.title('Barplot FlightName vs Avg. Price')
Text(0.5, 1.0, 'Barplot FlightName vs Avg. Price')
bom_blr_data.groupby("FlightName").size().plot(kind='pie', autopct='%1.0f%%', subplots=True)
plt.title('Pie chart Bombay --> Bengaluru Flight')
plt.show()
del_blr_data.groupby("FlightName").size().plot(kind='pie', autopct='%1.0f%%', subplots=True)
plt.title('Pie chart Delhi --> Bengaluru Flight')
plt.show()
del_bom_data.groupby("FlightName").size().plot(kind='pie', autopct='%1.0f%%', subplots=True)
plt.title('Pie chart Delhi --> Bombay Flight')
plt.show()
del_ccu_data.groupby("FlightName").size().plot(kind='pie', autopct='%1.0f%%', subplots=True)
plt.title('Pie chart Delhi --> Kolkata Flight')
plt.show()
del_hyd_data.groupby("FlightName").size().plot(kind='pie', autopct='%1.0f%%', subplots=True)
plt.title('Pie chart Delhi --> Hydrabad Flight')
plt.show()
sns.lineplot(bom_blr_data, x='DepartingHour', y='Price', hue='ArrivingCity', marker='o')
<Axes: xlabel='DepartingHour', ylabel='Price'>
sns.lineplot(del_blr_data, x='DepartingHour', y='Price', hue='ArrivingCity', marker='o')
<Axes: xlabel='DepartingHour', ylabel='Price'>
sns.lineplot(del_bom_data, x='DepartingHour', y='Price', hue='ArrivingCity', marker='o')
<Axes: xlabel='DepartingHour', ylabel='Price'>
sns.lineplot(del_ccu_data, x='DepartingHour', y='Price', hue='ArrivingCity', marker='o')
<Axes: xlabel='DepartingHour', ylabel='Price'>
sns.lineplot(del_hyd_data, x='DepartingHour', y='Price', hue='ArrivingCity', marker='o')
<Axes: xlabel='DepartingHour', ylabel='Price'>
bom_blr_x = bom_blr_data['DepartingHour']
bom_blr_y = bom_blr_data['Price']
model = np.poly1d(np.polyfit(bom_blr_x, bom_blr_y, 3))
line = np.linspace(1, 30, 90)
plt.scatter(bom_blr_x, bom_blr_y)
plt.plot(line, model(line))
plt.title('Polynomial Regression [Bombay --> Bengaluru]')
plt.show()
del_blr_x = del_blr_data['DepartingHour']
del_blr_y = del_blr_data['Price']
model = np.poly1d(np.polyfit(del_blr_x, del_blr_y, 3))
line = np.linspace(1, 30, 95)
plt.scatter(del_blr_x, del_blr_y)
plt.plot(line, model(line))
plt.title('Polynomial Regression [Delhi --> Bengaluru]')
plt.show()
del_bom_x = del_bom_data['DepartingHour']
del_bom_y = del_bom_data['Price']
model = np.poly1d(np.polyfit(del_bom_x, del_bom_y, 3))
line = np.linspace(1, 30, 100)
plt.scatter(del_bom_x, del_bom_y)
plt.plot(line, model(line))
plt.title('Polynomial Regression [Delhi --> Bombay]')
plt.show()
del_ccu_x = del_ccu_data['DepartingHour']
del_ccu_y = del_ccu_data['Price']
model = np.poly1d(np.polyfit(del_ccu_x, del_ccu_y, 3))
line = np.linspace(1, 30, 60)
plt.scatter(del_ccu_x, del_ccu_y)
plt.plot(line, model(line))
plt.title('Polynomial Regression [Delhi --> Kolkata]')
plt.show()
del_hyd_x = del_hyd_data['DepartingHour']
del_hyd_y = del_hyd_data['Price']
model = np.poly1d(np.polyfit(del_hyd_x, del_hyd_y, 3))
line = np.linspace(1, 30, 59)
plt.scatter(del_hyd_x, del_hyd_y)
plt.plot(line, model(line))
plt.title('Polynomial Regression [Delhi --> Hydrabad]')
plt.show()
bom_blr_d = {'Air India':0, 'AirAsia':1, 'IndiGo':2, 'Akasa Air':3, 'Vistara':4}
bom_blr_data['FlightName'] = bom_blr_data['FlightName'].map(bom_blr_d)
bom_blr_d = {'Bengaluru':0, 'Mumbai':1}
bom_blr_data['ArrivingCity'] = bom_blr_data['ArrivingCity'].map(bom_blr_d)
features = ['DepartingHour', 'Price', 'FlightName', 'ArrivingCity']
bom_blr_x = bom_blr_data[features]
bom_blr_y = bom_blr_data['ArrivingCity']
bom_blr_dtree = DecisionTreeClassifier()
bom_blr_dtree = bom_blr_dtree.fit(bom_blr_x, bom_blr_y)
tree.plot_tree(bom_blr_dtree, feature_names=features)
[Text(0.5, 0.75, 'ArrivingCity <= 0.5\ngini = 0.5\nsamples = 90\nvalue = [44, 46]'), Text(0.25, 0.25, 'gini = 0.0\nsamples = 44\nvalue = [44, 0]'), Text(0.75, 0.25, 'gini = 0.0\nsamples = 46\nvalue = [0, 46]')]
del_blr_d = {'Akasa Air':0, 'AirAsia':1, 'IndiGo':2, 'Air India':3, 'SpiceJet':4, 'Vistara':5}
del_blr_data['FlightName'] = del_blr_data['FlightName'].map(del_blr_d)
del_blr_d = {'Bengaluru':0, 'New Delhi':1}
del_blr_data['ArrivingCity'] = del_blr_data['ArrivingCity'].map(del_blr_d)
features = ['DepartingHour', 'Price', 'FlightName', 'ArrivingCity']
del_blr_x = del_blr_data[features]
del_blr_y = del_blr_data['ArrivingCity']
del_blr_dtree = DecisionTreeClassifier()
del_blr_dtree = del_blr_dtree.fit(del_blr_x, del_blr_y)
tree.plot_tree(del_blr_dtree, feature_names=features)
[Text(0.5, 0.75, 'ArrivingCity <= 0.5\ngini = 0.5\nsamples = 95\nvalue = [48, 47]'), Text(0.25, 0.25, 'gini = 0.0\nsamples = 48\nvalue = [48, 0]'), Text(0.75, 0.25, 'gini = 0.0\nsamples = 47\nvalue = [0, 47]')]
del_bom_d = {'Akasa Air':0, 'IndiGo':1, 'Air India':2, 'SpiceJet':3, 'Vistara':4}
del_bom_data['FlightName'] = del_bom_data['FlightName'].map(del_bom_d)
del_bom_d = {'Mumbai':0, 'New Delhi':1}
del_bom_data['ArrivingCity'] = del_bom_data['ArrivingCity'].map(del_bom_d)
features = ['DepartingHour', 'Price', 'FlightName', 'ArrivingCity']
del_bom_x = del_bom_data[features]
del_bom_y = del_bom_data['ArrivingCity']
del_bom_dtree = DecisionTreeClassifier()
del_bom_dtree = del_bom_dtree.fit(del_bom_x, del_bom_y)
tree.plot_tree(del_bom_dtree, feature_names=features)
[Text(0.5, 0.75, 'ArrivingCity <= 0.5\ngini = 0.5\nsamples = 100\nvalue = [50, 50]'), Text(0.25, 0.25, 'gini = 0.0\nsamples = 50\nvalue = [50, 0]'), Text(0.75, 0.25, 'gini = 0.0\nsamples = 50\nvalue = [0, 50]')]
del_ccu_d = {'IndiGo':0, 'Vistara':1, 'SpiceJet':2, 'Air India':3}
del_ccu_data['FlightName'] = del_ccu_data['FlightName'].map(del_ccu_d)
del_ccu_d = {'Kolkata':0, 'New Delhi':1}
del_ccu_data['ArrivingCity'] = del_ccu_data['ArrivingCity'].map(del_ccu_d)
features = ['DepartingHour', 'Price', 'FlightName', 'ArrivingCity']
del_ccu_x = del_ccu_data[features]
del_ccu_y = del_ccu_data['ArrivingCity']
del_ccu_dtree = DecisionTreeClassifier()
del_ccu_dtree = del_ccu_dtree.fit(del_ccu_x, del_ccu_y)
tree.plot_tree(del_ccu_dtree, feature_names=features)
[Text(0.5, 0.75, 'ArrivingCity <= 0.5\ngini = 0.5\nsamples = 60\nvalue = [30, 30]'), Text(0.25, 0.25, 'gini = 0.0\nsamples = 30\nvalue = [30, 0]'), Text(0.75, 0.25, 'gini = 0.0\nsamples = 30\nvalue = [0, 30]')]
del_hyd_d = {'Akasa Air':0, 'IndiGo':1, 'Air India':2, 'Vistara':3, 'SpiceJet':4}
del_hyd_data['FlightName'] = del_hyd_data['FlightName'].map(del_hyd_d)
del_hyd_d = {'Hyderabad':0, 'New Delhi':1}
del_hyd_data['ArrivingCity'] = del_hyd_data['ArrivingCity'].map(del_hyd_d)
features = ['DepartingHour', 'Price', 'FlightName', 'ArrivingCity']
del_hyd_x = del_hyd_data[features]
del_hyd_y = del_hyd_data['ArrivingCity']
del_hyd_dtree = DecisionTreeClassifier()
del_hyd_dtree = del_hyd_dtree.fit(del_hyd_x, del_hyd_y)
tree.plot_tree(del_hyd_dtree, feature_names=features)
[Text(0.5, 0.75, 'ArrivingCity <= 0.5\ngini = 0.5\nsamples = 59\nvalue = [30, 29]'), Text(0.25, 0.25, 'gini = 0.0\nsamples = 30\nvalue = [30, 0]'), Text(0.75, 0.25, 'gini = 0.0\nsamples = 29\nvalue = [0, 29]')]
sns.catplot(bom_blr_data, x='DepartingHour', y='Price', hue='DepartingCity', height=5.5)
<seaborn.axisgrid.FacetGrid at 0x1c2a137d590>
sns.catplot(del_blr_data, x='DepartingHour', y='Price', hue='DepartingCity', height=5.5)
<seaborn.axisgrid.FacetGrid at 0x1c2a10aa2d0>
sns.catplot(del_bom_data, x='DepartingHour', y='Price', hue='DepartingCity', height=5.5)
<seaborn.axisgrid.FacetGrid at 0x1c2a1363a50>
sns.catplot(del_ccu_data, x='DepartingHour', y='Price', hue='DepartingCity', height=5.5)
<seaborn.axisgrid.FacetGrid at 0x1c2a2974c10>
sns.catplot(del_hyd_data, x='DepartingHour', y='Price', hue='DepartingCity', height=5.5)
<seaborn.axisgrid.FacetGrid at 0x1c2a2ee8e90>
sns.violinplot(bom_blr_data, x='ArrivingCity', y='Price', hue='DepartingCity', dodge=False)
<Axes: xlabel='ArrivingCity', ylabel='Price'>
sns.violinplot(del_blr_data, x='ArrivingCity', y='Price', hue='DepartingCity', dodge=False)
<Axes: xlabel='ArrivingCity', ylabel='Price'>
sns.violinplot(del_bom_data, x='ArrivingCity', y='Price', hue='DepartingCity', dodge=False)
<Axes: xlabel='ArrivingCity', ylabel='Price'>
sns.violinplot(del_ccu_data, x='ArrivingCity', y='Price', hue='DepartingCity', dodge=False)
<Axes: xlabel='ArrivingCity', ylabel='Price'>
sns.violinplot(del_hyd_data, x='ArrivingCity', y='Price', hue='DepartingCity', dodge=False)
<Axes: xlabel='ArrivingCity', ylabel='Price'>